from bs4 import BeautifulSoup
import pandas as pd
import requests

# 该项目对景点信息的爬取是基于去哪儿网站所提供的服务
# 对网站发起请求报文，返回响应报文
def get_static_url_content(url):
    # 修改用户代理
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
    req=requests.get(url,headers=headers)
    content=req.text
    bsObj=BeautifulSoup(content,'lxml')
    return bsObj

# 根据网站的响应报文解析所需信息
def get_city_id():
    url = 'http://travel.qunar.com/place/'
    bsObj=get_static_url_content(url)
    cat_name = []
    code={}
    bs=bsObj.find_all('div',attrs={'class':'sub_list'})

    for i in range(0,len(bs)):
        # 对每一项提取信息
        xxx = bs[i].find_all('a')
        for j in range(0,len(xxx)):
            name=xxx[j].text
            cat_name.append(name)
            id=xxx[j].attrs['href']
            code[name]=id
    return cat_name,code


